#!/usr/bin/env python
# coding=utf-8
# __author__ = 'Yunchao Ling'

outfile=open("pmc_meta_20150722.xml","w")
outfile.write("<?xml version=\"1.0\"?>\n")
outfile.write("<!DOCTYPE pmc-articleset PUBLIC \"-//NLM//DTD ARTICLE SET 2.0//EN\" \"http://dtd.nlm.nih.gov/ncbi/pmc/articleset/nlm-articleset-2.0.dtd\">\n")
outfile.write("<pmc-articleset>\n")
outfile.flush()

for i in xrange(1,368):
    infile=open("pmc_id_"+str(i)+".xml","r")
    infile.readline()
    infile.readline()
    for line in infile:
        line=line.rstrip()
        if line!="" and line!="</pmc-articleset>":
            if line.startswith("<pmc-articleset>"):
                line=line[16:]
            outfile.write(line+"\n")
            outfile.flush()
    infile.close()
    print(str(i)+" completed.")
outfile.write("</pmc-articleset>\n")
outfile.flush()
outfile.close()